By Jack Wilburn under the supervision of Jaimi Butler

Libraries

library(tidyverse)
library(lubridate)
library(imager)
library(stringi)
library(reshape)

Import

train = read_csv("Training_data.csv", skip = 1)

Clean

# Fix variable names
names(train) = c("file", "time", "weather", "smoke", "pelicans", "pods", "npods", "disturbance", "stageofnesting", "preds", "abandon", "pretty")
# Remove appostrophe from file
train$file = substr(train$file,1,nchar(train$file)-1)
# Factor and clean factors
train[,c(2:6,8:12)] <- lapply(train[,c(2:6,8:12)], factor)
levels(train$weather) = c("cloud", "cloud", "cloud", "rain", "sun")
# Make NAs meaningful
# Create a date variable for time series
train$date = substr(train$file,1,nchar(train$file)-4)
train$date = ymd_hms(train$date)
# Look at progress
glimpse(train)
Observations: 1,011
Variables: 13
$ file           <chr> "20170309002000.jpg", "20170309014000.jpg", "20170309091500.jpg", "20170309162000.j...
$ time           <fct> day, day, night, day, day, night, day, night, day, day, night, night, day, day, day...
$ weather        <fct> sun, NA, NA, sun, sun, NA, cloud, NA, sun, cloud, NA, NA, sun, sun, sun, NA, NA, NA...
$ smoke          <fct> FALSE, NA, NA, FALSE, FALSE, NA, FALSE, NA, FALSE, FALSE, NA, NA, FALSE, FALSE, FAL...
$ pelicans       <fct> FALSE, NA, NA, FALSE, FALSE, NA, FALSE, NA, TRUE, TRUE, NA, NA, TRUE, TRUE, FALSE, ...
$ pods           <fct> FALSE, NA, NA, FALSE, FALSE, NA, FALSE, NA, TRUE, TRUE, NA, NA, TRUE, TRUE, FALSE, ...
$ npods          <int> NA, NA, NA, NA, NA, NA, NA, NA, 1, 1, NA, NA, 1, 1, NA, NA, NA, NA, NA, NA, NA, NA,...
$ disturbance    <fct> TRUE, NA, NA, FALSE, FALSE, NA, FALSE, NA, FALSE, FALSE, NA, NA, FALSE, FALSE, FALS...
$ stageofnesting <fct> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
$ preds          <fct> FALSE, NA, NA, FALSE, FALSE, NA, FALSE, NA, FALSE, FALSE, NA, NA, FALSE, FALSE, FAL...
$ abandon        <fct> NA, NA, NA, NA, NA, NA, NA, NA, FALSE, FALSE, NA, NA, FALSE, FALSE, NA, NA, NA, NA,...
$ pretty         <fct> NA, NA, NA, FALSE, FALSE, NA, FALSE, NA, FALSE, FALSE, NA, NA, FALSE, FALSE, FALSE,...
$ date           <dttm> 2017-03-09 00:20:00, 2017-03-09 01:40:00, 2017-03-09 09:15:00, 2017-03-09 16:20:00...
# Remove uninformative variables
train = train[,c(1:8,12,13)]

Output Cleaned Data for Algorithm Building

write_csv(train, "train_clean.csv")

Pelicans

Let’s take a look at when the pelicans were first seen on Lambourne Bay and when they left.

ggplot(train[!(is.na(train$pelicans)),], aes(x = date, y = pelicans)) + 
  geom_point() +
  labs(title = "Pelican Sightings: 2017",
       y = "Pelicans",
       x = "Date")

We can see that the pelicans came to the bay around the start of April and left at around the start of October. Let’s get even more specific:

sightings = na.omit(train[train$pelicans == TRUE,]$date)
sightings = na.omit(train[train$pelicans == TRUE,]$date)
head(sightings, 10)
 [1] "2017-03-11 14:50:00 UTC" "2017-03-11 23:55:00 UTC" "2017-03-12 14:00:00 UTC" "2017-03-12 16:40:00 UTC"
 [5] "2017-03-30 23:00:00 UTC" "2017-03-31 02:10:00 UTC" "2017-03-31 13:25:00 UTC" "2017-03-31 15:00:00 UTC"
 [9] "2017-04-01 00:10:00 UTC" "2017-04-01 14:05:00 UTC"

We can see that there were some sightings in mid March but the bulk of the sightings started to late on 3/30 and into early 3/31.

tail(sightings, 10)
 [1] "2017-07-25 23:35:00 UTC" "2017-07-26 01:25:00 UTC" "2017-07-26 02:20:00 UTC" "2017-07-26 03:05:00 UTC"
 [5] "2017-07-26 12:25:00 UTC" "2017-07-26 16:05:00 UTC" "2017-07-26 18:40:00 UTC" "2017-07-27 13:40:00 UTC"
 [9] "2017-07-27 15:35:00 UTC" "2017-08-04 19:00:00 UTC"

We can see that there was a sighting on 8/4 but the bulk of the sightings stopped on 7/27.

These are just the sightings for Lambourne Bay so it’s conceivable that the birds were still on the island but had move from this bay specifically. Ultimately, I believe the time that the pelicans were on Lambourne Bay in 2017 were from 3/30 to 7/27.

Pods

Now let’s look at the number of pods on the bay over time and see how they changed.

ggplot(train, aes(x = date, y = npods)) + 
  geom_point(alpha = 0.25) + 
  geom_smooth(se = FALSE) + 
  scale_y_continuous(limits = c(0,10)) + 
  scale_x_datetime(date_breaks = "1 month", date_labels = "%b") + 
  labs(title = "Number of Pelican Pods: 2017",
       x = "Date",
       y = "Number of Pods")

We see that the number of pods increased in March and April from 0 pods to out 7 or 8. That number then declined to 1-3 by mid June. That number then stayed constant until the pelicans left Lambourne Bay.

View Pretty Photos

pretty = subset(train, train$pretty == TRUE)
pimg = rep(NA, nrow(pretty))
for (i in 1:nrow(pretty)) {
  pimg[i] = stri_replace_all_fixed(paste("PeliPhotos1Folder/", pretty$file[i], collapse = ""), 
                                            pattern = " ", replacement = "")
}
imlist = imlist(load.image(pimg[1]), load.image(pimg[2]), load.image(pimg[3]), load.image(pimg[4]),
                load.image(pimg[5]), load.image(pimg[6]), load.image(pimg[7]), load.image(pimg[8]),
                load.image(pimg[9]), load.image(pimg[10]), load.image(pimg[11]), load.image(pimg[12]),
                load.image(pimg[13]), load.image(pimg[14]))
#imlist = as.data.frame(imlist)
for (i in 1:14) {
  plot(imlist[i])
}

Weather

ggplot(train, aes(x = date, y = weather)) + geom_point()

Smoke

smoke = subset(train, train$smoke == TRUE)
simg = rep(NA, nrow(smoke))
for (i in 1:nrow(smoke)) {
  simg[i] = stri_replace_all_fixed(paste("PeliPhotos1Folder/", smoke$file[i], collapse = ""), 
                                            pattern = " ", replacement = "")
}
imlist = imlist(load.image(simg[1]), load.image(simg[2]))
JPEG decompression: Premature end of JPEG file
#imlist = as.data.frame(imlist)
for (i in 1:2) {
  plot(imlist[i])
}

Disturbance

Notes

abandonment, stageofnesting and predators are all false or NA and are, therefore, of little use. Thus I removed them from the set for analysis in python.

LS0tCnRpdGxlOiAiQW5hbHlzaXMgb2YgdGhlIFRyYWluaW5nIERhdGEiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCiMjIyBCeSBKYWNrIFdpbGJ1cm4gdW5kZXIgdGhlIHN1cGVydmlzaW9uIG9mIEphaW1pIEJ1dGxlcgoKIyMjIExpYnJhcmllcwoKYGBge3IsIG1lc3NhZ2UgPSBGQUxTRSwgd2FybmluZyA9IEZBTFNFfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKbGlicmFyeShsdWJyaWRhdGUpCmxpYnJhcnkoaW1hZ2VyKQpsaWJyYXJ5KHN0cmluZ2kpCmxpYnJhcnkocmVzaGFwZSkKYGBgCgojIyMgSW1wb3J0CgpgYGB7ciwgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0KdHJhaW4gPSByZWFkX2NzdigiVHJhaW5pbmdfZGF0YS5jc3YiLCBza2lwID0gMSkKYGBgCgojIyMgQ2xlYW4KCmBgYHtyfQojIEZpeCB2YXJpYWJsZSBuYW1lcwpuYW1lcyh0cmFpbikgPSBjKCJmaWxlIiwgInRpbWUiLCAid2VhdGhlciIsICJzbW9rZSIsICJwZWxpY2FucyIsICJwb2RzIiwgIm5wb2RzIiwgImRpc3R1cmJhbmNlIiwgInN0YWdlb2ZuZXN0aW5nIiwgInByZWRzIiwgImFiYW5kb24iLCAicHJldHR5IikKCiMgUmVtb3ZlIGFwcG9zdHJvcGhlIGZyb20gZmlsZQp0cmFpbiRmaWxlID0gc3Vic3RyKHRyYWluJGZpbGUsMSxuY2hhcih0cmFpbiRmaWxlKS0xKQoKIyBGYWN0b3IgYW5kIGNsZWFuIGZhY3RvcnMKdHJhaW5bLGMoMjo2LDg6MTIpXSA8LSBsYXBwbHkodHJhaW5bLGMoMjo2LDg6MTIpXSwgZmFjdG9yKQpsZXZlbHModHJhaW4kd2VhdGhlcikgPSBjKCJjbG91ZCIsICJjbG91ZCIsICJjbG91ZCIsICJyYWluIiwgInN1biIpCgojIENyZWF0ZSBhIGRhdGUgdmFyaWFibGUgZm9yIHRpbWUgc2VyaWVzCnRyYWluJGRhdGUgPSBzdWJzdHIodHJhaW4kZmlsZSwxLG5jaGFyKHRyYWluJGZpbGUpLTQpCnRyYWluJGRhdGUgPSB5bWRfaG1zKHRyYWluJGRhdGUpCgojIFJlbW92ZSB1bmluZm9ybWF0aXZlIHZhcmlhYmxlcwp0cmFpbiA9IHRyYWluWyxjKDE6OCwxMiwxMyldCgojIFZpZXcgd2hhdCdzIGxlZnQKZ2xpbXBzZSh0cmFpbikKYGBgCgojIyMgT3V0cHV0IENsZWFuZWQgRGF0YSBmb3IgQWxnb3JpdGhtIEJ1aWxkaW5nCgpgYGB7cn0Kd3JpdGVfY3N2KHRyYWluLCAidHJhaW5fY2xlYW4uY3N2IikKYGBgCgojIyMgUGVsaWNhbnMKCkxldCdzIHRha2UgYSBsb29rIGF0IHdoZW4gdGhlIHBlbGljYW5zIHdlcmUgZmlyc3Qgc2VlbiBvbiBMYW1ib3VybmUgQmF5IGFuZCB3aGVuIHRoZXkgbGVmdC4KCmBgYHtyfQpnZ3Bsb3QodHJhaW5bIShpcy5uYSh0cmFpbiRwZWxpY2FucykpLF0sIGFlcyh4ID0gZGF0ZSwgeSA9IHBlbGljYW5zKSkgKyAKICBnZW9tX3BvaW50KCkgKwogIGxhYnModGl0bGUgPSAiUGVsaWNhbiBTaWdodGluZ3M6IDIwMTciLAogICAgICAgeSA9ICJQZWxpY2FucyIsCiAgICAgICB4ID0gIkRhdGUiKQpgYGAKCldlIGNhbiBzZWUgdGhhdCB0aGUgcGVsaWNhbnMgY2FtZSB0byB0aGUgYmF5IGFyb3VuZCB0aGUgc3RhcnQgb2YgQXByaWwgYW5kIGxlZnQgYXQgYXJvdW5kIHRoZSBzdGFydCBvZiBPY3RvYmVyLiBMZXQncyBnZXQgZXZlbiBtb3JlIHNwZWNpZmljOgoKYGBge3J9CnNpZ2h0aW5ncyA9IG5hLm9taXQodHJhaW5bdHJhaW4kcGVsaWNhbnMgPT0gVFJVRSxdJGRhdGUpCmhlYWQoc2lnaHRpbmdzLCAxMCkKYGBgCgpXZSBjYW4gc2VlIHRoYXQgdGhlcmUgd2VyZSBzb21lIHNpZ2h0aW5ncyBpbiBtaWQgTWFyY2ggYnV0IHRoZSBidWxrIG9mIHRoZSBzaWdodGluZ3Mgc3RhcnRlZCB0byBsYXRlIG9uIDMvMzAgYW5kIGludG8gZWFybHkgMy8zMS4KCmBgYHtyfQp0YWlsKHNpZ2h0aW5ncywgMTApCmBgYAoKV2UgY2FuIHNlZSB0aGF0IHRoZXJlIHdhcyBhIHNpZ2h0aW5nIG9uIDgvNCBidXQgdGhlIGJ1bGsgb2YgdGhlIHNpZ2h0aW5ncyBzdG9wcGVkIG9uIDcvMjcuCgpUaGVzZSBhcmUganVzdCB0aGUgc2lnaHRpbmdzIGZvciBMYW1ib3VybmUgQmF5IHNvIGl0J3MgY29uY2VpdmFibGUgdGhhdCB0aGUgYmlyZHMgd2VyZSBzdGlsbCBvbiB0aGUgaXNsYW5kIGJ1dCBoYWQgbW92ZSBmcm9tIHRoaXMgYmF5IHNwZWNpZmljYWxseS4gVWx0aW1hdGVseSwgSSBiZWxpZXZlIHRoZSB0aW1lIHRoYXQgdGhlIHBlbGljYW5zIHdlcmUgb24gTGFtYm91cm5lIEJheSBpbiAyMDE3IHdlcmUgZnJvbSAzLzMwIHRvIDcvMjcuIAoKIyMjIFBvZHMKCk5vdyBsZXQncyBsb29rIGF0IHRoZSBudW1iZXIgb2YgcG9kcyBvbiB0aGUgYmF5IG92ZXIgdGltZSBhbmQgc2VlIGhvdyB0aGV5IGNoYW5nZWQuIAoKYGBge3IsIG1lc3NhZ2UgPSBGQUxTRSwgd2FybmluZyA9IEZBTFNFfQpnZ3Bsb3QodHJhaW4sIGFlcyh4ID0gZGF0ZSwgeSA9IG5wb2RzKSkgKyAKICBnZW9tX3BvaW50KGFscGhhID0gMC4yNSkgKyAKICBnZW9tX3Ntb290aChzZSA9IEZBTFNFKSArIAogIHNjYWxlX3lfY29udGludW91cyhsaW1pdHMgPSBjKDAsMTApKSArIAogIHNjYWxlX3hfZGF0ZXRpbWUoZGF0ZV9icmVha3MgPSAiMSBtb250aCIsIGRhdGVfbGFiZWxzID0gIiViIikgKyAKICBsYWJzKHRpdGxlID0gIk51bWJlciBvZiBQZWxpY2FuIFBvZHM6IDIwMTciLAogICAgICAgeCA9ICJEYXRlIiwKICAgICAgIHkgPSAiTnVtYmVyIG9mIFBvZHMiKQpgYGAKCldlIHNlZSB0aGF0IHRoZSBudW1iZXIgb2YgcG9kcyBpbmNyZWFzZWQgaW4gTWFyY2ggYW5kIEFwcmlsIGZyb20gMCBwb2RzIHRvIG91dCA3IG9yIDguIFRoYXQgbnVtYmVyIHRoZW4gZGVjbGluZWQgdG8gMS0zIGJ5IG1pZCBKdW5lLiBUaGF0IG51bWJlciB0aGVuIHN0YXllZCBjb25zdGFudCB1bnRpbCB0aGUgcGVsaWNhbnMgbGVmdCBMYW1ib3VybmUgQmF5LgoKIyMjIFZpZXcgUHJldHR5IFBob3RvcwoKYGBge3J9CnByZXR0eSA9IHN1YnNldCh0cmFpbiwgdHJhaW4kcHJldHR5ID09IFRSVUUpCnBpbWcgPSByZXAoTkEsIG5yb3cocHJldHR5KSkKZm9yIChpIGluIDE6bnJvdyhwcmV0dHkpKSB7CiAgcGltZ1tpXSA9IHN0cmlfcmVwbGFjZV9hbGxfZml4ZWQocGFzdGUoIlBlbGlQaG90b3MxRm9sZGVyLyIsIHByZXR0eSRmaWxlW2ldLCBjb2xsYXBzZSA9ICIiKSwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgcGF0dGVybiA9ICIgIiwgcmVwbGFjZW1lbnQgPSAiIikKfQoKaW1saXN0ID0gaW1saXN0KGxvYWQuaW1hZ2UocGltZ1sxXSksIGxvYWQuaW1hZ2UocGltZ1syXSksIGxvYWQuaW1hZ2UocGltZ1szXSksIGxvYWQuaW1hZ2UocGltZ1s0XSksCiAgICAgICAgICAgICAgICBsb2FkLmltYWdlKHBpbWdbNV0pLCBsb2FkLmltYWdlKHBpbWdbNl0pLCBsb2FkLmltYWdlKHBpbWdbN10pLCBsb2FkLmltYWdlKHBpbWdbOF0pLAogICAgICAgICAgICAgICAgbG9hZC5pbWFnZShwaW1nWzldKSwgbG9hZC5pbWFnZShwaW1nWzEwXSksIGxvYWQuaW1hZ2UocGltZ1sxMV0pLCBsb2FkLmltYWdlKHBpbWdbMTJdKSwKICAgICAgICAgICAgICAgIGxvYWQuaW1hZ2UocGltZ1sxM10pLCBsb2FkLmltYWdlKHBpbWdbMTRdKSkKI2ltbGlzdCA9IGFzLmRhdGEuZnJhbWUoaW1saXN0KQpmb3IgKGkgaW4gMToxNCkgewogIHBsb3QoaW1saXN0W2ldKQp9CmBgYAoKIyMjIFdlYXRoZXIKCmBgYHtyfQpnZ3Bsb3QodHJhaW4sIGFlcyh4ID0gZGF0ZSwgeSA9IHdlYXRoZXIpKSArIGdlb21fcG9pbnQoKQpgYGAKCiMjIyBTbW9rZQoKYGBge3J9CnNtb2tlID0gc3Vic2V0KHRyYWluLCB0cmFpbiRzbW9rZSA9PSBUUlVFKQpzaW1nID0gcmVwKE5BLCBucm93KHNtb2tlKSkKZm9yIChpIGluIDE6bnJvdyhzbW9rZSkpIHsKICBzaW1nW2ldID0gc3RyaV9yZXBsYWNlX2FsbF9maXhlZChwYXN0ZSgiUGVsaVBob3RvczFGb2xkZXIvIiwgc21va2UkZmlsZVtpXSwgY29sbGFwc2UgPSAiIiksIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHBhdHRlcm4gPSAiICIsIHJlcGxhY2VtZW50ID0gIiIpCn0KCmltbGlzdCA9IGltbGlzdChsb2FkLmltYWdlKHNpbWdbMV0pLCBsb2FkLmltYWdlKHNpbWdbMl0pKQojaW1saXN0ID0gYXMuZGF0YS5mcmFtZShpbWxpc3QpCmZvciAoaSBpbiAxOjIpIHsKICBwbG90KGltbGlzdFtpXSkKfQpgYGAKCiMjIyBEaXN0dXJiYW5jZQoKYGBge3J9CmRmID0gbmEub21pdCh0cmFpblt0cmFpbiRkaXN0dXJiYW5jZSA9PSBUUlVFLGMoImRhdGUiLCAiZGlzdHVyYmFuY2UiKV0pCmdncGxvdChkZiwgYWVzKHggPSBkYXRlLCB5ID0gZGlzdHVyYmFuY2UpKSArIAogIGdlb21fcG9pbnQoYWxwaGEgPSAwLjUpCmBgYAoKIyMjIE5vdGVzCgpgYWJhbmRvbm1lbnRgLCBgc3RhZ2VvZm5lc3RpbmdgIGFuZCBgcHJlZGF0b3JzYCBhcmUgYWxsIGZhbHNlIG9yIE5BIGFuZCBhcmUsIHRoZXJlZm9yZSwgb2YgbGl0dGxlIHVzZS4gVGh1cyBJIHJlbW92ZWQgdGhlbSBmcm9tIHRoZSBzZXQgZm9yIGFuYWx5c2lzIGluIHB5dGhvbi4gCgoKCgoKCgoKCg==